# =============================================================================
# APPENDIX K FIGURE 16A: KIM & SUDDUTH ADDITIONAL ROBUSTNESS
# Extended robustness analysis including false positive effects
# Author: [Your Name]
# Last modified: [Date]
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(dplyr)
library(haven)
library(sandwich)
library(ggplot2)
library(reshape2)
library(lmtest)
library(stargazer)

# --- DATA LOADING ------------------------------------------------------------
data <- read_dta("kim_sudduth.dta")
est <- read.csv("estimates_independent.csv")

# --- DATA PREPARATION --------------------------------------------------------
# Ensure consistent identifiers
data$COWcode <- data$cowcode

# Merge and create lagged variables
data_new <- inner_join(est, data, by = c("COWcode", "year")) %>%
  arrange(COWcode, year) %>%
  group_by(COWcode) %>%
  mutate(
    dyn.estimates_lag = lag(dyn.estimates, n = 1),
    dyn.up_lag = lag(dyn.up, n = 1),
    dyn.lo_lag = lag(dyn.lo, n = 1)
  ) %>%
  ungroup()

# Create exact matching sample
data_new_exact <- data_new %>% filter(!is.na(dyn.estimates_lag))

data_used_in_models <- data_new_exact %>% 
  filter(complete.cases(dyn.estimates_lag, anycoupatt_fin, gwf_leadermil, 
                        ll_rgdpe_PW, lag_growth_rgdpe_PW, ll_pop_PW, postcold, anyattyrs))

# --- HELPER FUNCTIONS --------------------------------------------------------
get_clustered_se <- function(model, cluster_var) {
  vcov_cluster <- vcovHC(model, type = "HC1", cluster = cluster_var)
  coeftest(model, vcov_cluster)
}

leave_one_out <- function(model_formula, data) {
  n <- nrow(data)
  coef_results <- numeric(n)
  
  for (i in 1:n) {
    temp_data <- data[-i, ]
    model <- glm(model_formula, data = temp_data, family = "binomial")
    coef_results[i] <- coef(model)["dyn.estimates_lag"]
  }
  
  return(coef_results)
}

compare_coefficients <- function(coef_vector, original_coef, model_name) {
  h <- hist(coef_vector, main = paste("", model_name),
            xlab = "Coefficient Estimate",
            col = "gray", border = "white", breaks = 20)
  
  abline(v = original_coef, col = "blue", lwd = 2, lty = 2)
  
  y_max <- max(h$counts)
  text(x = original_coef, y = y_max * 0.9, 
       labels = paste0("Full Sample:\n", round(original_coef, 4)),
       col = "blue", pos = 4, cex = 0.8, xpd = TRUE)
  
  cat(paste0("\n", model_name, ":\n"))
  cat("Original coefficient: ", round(original_coef, 4), "\n")
  cat("Mean of leave-one-out coefficients: ", round(mean(coef_vector), 4), "\n")
  cat("Min of leave-one-out coefficients: ", round(min(coef_vector), 4), "\n")
  cat("Max of leave-one-out coefficients: ", round(max(coef_vector), 4), "\n")
  cat("Range of leave-one-out coefficients: ", round(range(coef_vector), 4), "\n")
}

# --- ANALYSIS ----------------------------------------------------------------
# Baseline models (restricted sample)
m2restricted <- glm(anycoupatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW + 
                      lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), 
                    data = data_used_in_models, family = "binomial")

m5restricted <- glm(reshuffleatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW + 
                      lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), 
                    data = data_used_in_models, family = "binomial")

m8restricted <- glm(rechangeatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW + 
                      lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), 
                    data = data_used_in_models, family = "binomial")

# New models with latent measure
m2new <- glm(anycoupatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
               lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), 
             data = data_new, family = "binomial")

m5new <- glm(reshuffleatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
               lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), 
             data = data_new, family = "binomial")

m8new <- glm(rechangeatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
               lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), 
             data = data_new, family = "binomial")

# Generate main comparison table
stargazer(m2restricted, m2new, m5restricted, m5new, m8restricted, m8new, 
          type = "latex",
          se = list(
            sqrt(diag(vcovHC(m2restricted, type = "HC1", cluster = data_used_in_models$cowcode))),
            sqrt(diag(vcovHC(m2new, type = "HC1", cluster = data_new$cowcode))),
            sqrt(diag(vcovHC(m5restricted, type = "HC1", cluster = data_used_in_models$cowcode))),
            sqrt(diag(vcovHC(m5new, type = "HC1", cluster = data_new$cowcode))),
            sqrt(diag(vcovHC(m8restricted, type = "HC1", cluster = data_used_in_models$cowcode))),
            sqrt(diag(vcovHC(m8new, type = "HC1", cluster = data_new$cowcode)))
          ),
          column.labels = c("Restricted", "Replication", "Restricted", "Replication", 
                            "Restricted", "Replication"),
          star.cutoffs = c(0.1, 0.05, 0.01), 
          notes.append = FALSE)

# --- FALSE POSITIVE ANALYSIS -------------------------------------------------
# Create false positive indicator
data_used_in_models <- data_used_in_models %>%
  mutate(
    dyn_quartile = ntile(dyn.estimates_lag, 4),
    false_positive = ifelse(gwf_legislature == 1 & dyn_quartile == 1, 1, 0)
  )

# False positive models
m_fp <- glm(anycoupatt_fin ~ false_positive + gwf_leadermil + ll_rgdpe_PW + 
              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), 
            data = data_used_in_models, family = "binomial")

m_fp_reshuffle <- glm(reshuffleatt_fin ~ false_positive + gwf_leadermil + ll_rgdpe_PW + 
                        lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), 
                      data = data_used_in_models, family = "binomial")

m_fp_rechange <- glm(rechangeatt_fin ~ false_positive + gwf_leadermil + ll_rgdpe_PW + 
                       lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), 
                     data = data_used_in_models, family = "binomial")

# Clustered SEs for false positive models
se_fp_coup <- sqrt(diag(vcovHC(m_fp, type = "HC1", cluster = data_used_in_models$cowcode)))
se_fp_reshuffle <- sqrt(diag(vcovHC(m_fp_reshuffle, type = "HC1", cluster = data_used_in_models$cowcode)))
se_fp_rechange <- sqrt(diag(vcovHC(m_fp_rechange, type = "HC1", cluster = data_used_in_models$cowcode)))

stargazer(m_fp, m_fp_reshuffle, m_fp_rechange,
          type = "latex",
          title = "Effect of Symbolic Legislatures (False Positives) on Coup-Related Outcomes",
          column.labels = c("Coup Attempt", "Cabinet Reshuffle Attempt", "Leadership Change Attempt"),
          se = list(se_fp_coup, se_fp_reshuffle, se_fp_rechange),
          star.cutoffs = c(0.1, 0.05, 0.01),
          dep.var.labels.include = FALSE,
          model.names = FALSE)

# --- ROBUSTNESS ANALYSIS -----------------------------------------------------
# Leave-one-out analysis
coef_m2new <- leave_one_out(anycoupatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
                              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), data_new)

coef_m5new <- leave_one_out(reshuffleatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
                              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), data_new)

coef_m8new <- leave_one_out(rechangeatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
                              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), data_new)

# Extract original coefficients
orig_coef_m2new <- coef(m2new)["dyn.estimates_lag"]
orig_coef_m5new <- coef(m5new)["dyn.estimates_lag"]
orig_coef_m8new <- coef(m8new)["dyn.estimates_lag"]

# --- VISUALIZATION -----------------------------------------------------------
par(mfrow = c(1, 3))
compare_coefficients(coef_m2new, orig_coef_m2new, "All Coups")
compare_coefficients(coef_m5new, orig_coef_m5new, "Reshuffling Coups")
compare_coefficients(coef_m8new, orig_coef_m8new, "Regime-Change Coups")
par(mfrow = c(1, 1))